"""Entry point for the fractal‑pivot calibration pipeline.

This script orchestrates the entire end‑to‑end analysis of several
datasets.  For each input file it performs the following steps:

1. Load and preprocess the data (unzip archives, drop non‑numeric
   columns, etc.).
2. Compute the raw box‑counting dimension estimates over seven
   scales using the ``boxcount`` module.
3. Fit the logistic pivot model to these estimates and compute the
   goodness of fit.
4. Save the raw dimensions to CSV and the fitted pivot plot to PNG.
5. Append a row to a summary table with the fitted parameters and
   number of points.

The results are written under the ``results/`` directory.  The
expected outputs are ``*_D_raw.csv`` and ``*_pivot.png`` for each
dataset along with a ``summary.csv`` collecting all model
parameters.
"""

from __future__ import annotations

import csv
import os
from pathlib import Path

import numpy as np

from data_loader import load_dataset
from boxcount import box_count_dimension
from pivot_fit import fit_logistic
from plot_utils import plot_pivot


def process_dataset(data_path: Path, dataset_key: str, results_dir: Path) -> dict:
    """Process a single dataset and write raw/pivot outputs.

    Parameters
    ----------
    data_path : Path
        Path to the input data file.
    dataset_key : str
        Identifier used in output filenames (e.g. "barnsley").
    results_dir : Path
        Directory where outputs should be saved.

    Returns
    -------
    dict
        Dictionary containing summary information for this dataset.
    """
    print(f"Loading dataset: {data_path}")
    points = load_dataset(data_path)
    n_points, n_features = points.shape
    print(f"Loaded {n_points} samples with {n_features} feature(s)")
    # Compute raw dimensions
    n_vals, D_vals = box_count_dimension(points, scales=range(1, 8))
    # Save raw dimensions to CSV
    raw_csv_path = results_dir / f"{dataset_key}_D_raw.csv"
    with open(raw_csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["n", "D_raw"])
        for n, D in zip(n_vals, D_vals):
            writer.writerow([int(n), float(D)])
    print(f"Wrote raw dimensions to {raw_csv_path}")
    # Fit logistic model
    k, n0, r2, pred = fit_logistic(n_vals, D_vals)
    # Save pivot plot
    pivot_png_path = results_dir / f"{dataset_key}_pivot.png"
    plot_pivot(n_vals, D_vals, pred, k, n0, r2, dataset_key, str(pivot_png_path))
    print(f"Saved pivot plot to {pivot_png_path}")
    return {
        'dataset': dataset_key,
        'k': k,
        'n0': n0,
        'R2': r2,
        'N_points': n_points,
    }


def main() -> None:
    """Run the calibration pipeline on all expected datasets."""
    root = Path(__file__).resolve().parent.parent
    data_dir = root / 'data'
    results_dir = root / 'results'
    results_dir.mkdir(exist_ok=True)
    # Map of expected filenames to dataset keys
    datasets = {
        'barnsley_fern.csv': 'barnsley',
        'sierpinski_triangle.csv': 'sierpinski',
        'points_neuro.csv': 'neuro',
        'galaxies_balanced_xyz.csv': 'galaxy',
        'lidar_data.zip': 'lidar',
        'dem_data.zip': 'dem',
    }
    summary_rows = []
    for filename, key in datasets.items():
        data_path = data_dir / filename
        if not data_path.exists():
            print(f"Warning: dataset file {filename} is missing. Skipping.")
            continue
        info = process_dataset(data_path, key, results_dir)
        summary_rows.append(info)
    # Save summary CSV
    summary_csv_path = results_dir / 'summary.csv'
    with open(summary_csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['dataset', 'k', 'n0', 'R2', 'N_points'])
        for row in summary_rows:
            writer.writerow([row['dataset'], row['k'], row['n0'], row['R2'], row['N_points']])
    print(f"Saved summary to {summary_csv_path}")

if __name__ == '__main__':
    main()